coreset_df <- read.csv('../data/coreset_train.csv')
coreset_df
table(coreset_df$class)
0 1
267 4
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ───────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.2 ✓ purrr 0.3.4
✓ tibble 3.0.3 ✓ dplyr 1.0.1
✓ tidyr 1.1.1 ✓ stringr 1.4.0
✓ readr 1.3.1 ✓ forcats 0.5.0
── Conflicts ──────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(caret)
Loading required package: lattice
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: ‘caret’
The following object is masked from ‘package:purrr’:
lift
library(GGally)
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
library(ggplot2)
library(corrplot)
corrplot 0.84 loaded
library(bayesplot)
This is bayesplot version 1.7.2
- Online documentation and vignettes at mc-stan.org/bayesplot
- bayesplot theme set to bayesplot::theme_default()
* Does _not_ affect other ggplot2 plots
* See ?bayesplot_theme_set for details on theme setting
theme_set(bayesplot::theme_default(base_family = "sans"))
library(rstanarm)
Loading required package: Rcpp
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
This is rstanarm version 2.21.1
- See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
- Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
- For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores())
Attaching package: ‘rstanarm’
The following objects are masked from ‘package:caret’:
compare_models, R2
options(mc.cores = parallel::detectCores())
library(loo)
This is loo version 2.3.1
- Online documentation and vignettes at mc-stan.org/loo
- As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
library(projpred)
This is projpred version 1.1.6.
SEED=42
t_prior <- student_t(df = 7, location = 0, scale = 2.5)
post_full_coreset <- stan_glm(class ~ . , data = coreset_df,
family = binomial(link = "logit"),
prior = t_prior, prior_intercept = t_prior,
cores=4, seed = 42)
starting worker pid=18038 on localhost:11740 at 15:12:59.396
starting worker pid=18052 on localhost:11740 at 15:12:59.787
starting worker pid=18066 on localhost:11740 at 15:13:00.134
starting worker pid=18080 on localhost:11740 at 15:13:00.556
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.001639 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 16.39 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000175 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.75 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000225 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.25 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000175 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.75 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 7.87206 seconds (Warm-up)
Chain 4: 4.67815 seconds (Sampling)
Chain 4: 12.5502 seconds (Total)
Chain 4:
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 8.30372 seconds (Warm-up)
Chain 1: 7.77778 seconds (Sampling)
Chain 1: 16.0815 seconds (Total)
Chain 1:
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 8.08773 seconds (Warm-up)
Chain 2: 7.83582 seconds (Sampling)
Chain 2: 15.9235 seconds (Total)
Chain 2:
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 8.28147 seconds (Warm-up)
Chain 3: 7.70233 seconds (Sampling)
Chain 3: 15.9838 seconds (Total)
Chain 3:
summary(post_full_coreset)
Model Info:
function: stan_glm
family: binomial [logit]
formula: class ~ .
algorithm: sampling
sample: 4000 (posterior sample size)
priors: see help('prior_summary')
observations: 271
predictors: 65
Estimates:
mean sd 10% 50% 90%
(Intercept) -11.9 2.5 -15.2 -11.6 -8.9
Attr1 -0.2 2.7 -3.6 -0.2 3.1
Attr2 0.0 2.8 -3.4 0.0 3.5
Attr3 1.4 1.4 -0.3 1.4 3.2
Attr4 -0.4 2.8 -3.8 -0.3 3.0
Attr5 0.0 3.0 -3.7 0.0 3.6
Attr6 1.2 2.7 -2.1 1.0 4.5
Attr7 0.0 3.0 -3.6 -0.1 3.5
Attr8 -0.6 2.6 -3.9 -0.5 2.6
Attr9 0.4 1.9 -2.0 0.5 2.9
Attr10 0.1 2.8 -3.3 0.2 3.6
Attr11 0.2 2.1 -2.4 0.1 2.9
Attr12 0.4 2.7 -2.9 0.3 3.8
Attr13 -0.1 3.1 -3.9 0.0 3.6
Attr14 -0.1 2.9 -3.6 -0.1 3.4
Attr15 -1.0 1.6 -3.1 -1.1 1.1
Attr16 -1.2 2.4 -4.3 -1.1 1.8
Attr17 -0.6 2.5 -3.9 -0.5 2.5
Attr18 -0.1 3.1 -3.8 0.0 3.6
Attr19 -0.1 2.7 -3.5 -0.1 3.2
Attr20 1.0 2.8 -2.3 0.9 4.4
Attr21 0.0 3.0 -3.4 0.0 3.5
Attr22 -0.6 2.4 -3.6 -0.6 2.3
Attr23 0.1 2.8 -3.3 0.0 3.5
Attr24 1.8 2.3 -0.8 1.5 4.6
Attr25 0.6 2.5 -2.5 0.5 3.7
Attr26 -0.8 2.4 -3.7 -0.8 2.1
Attr27 0.0 3.1 -3.5 0.0 3.7
Attr28 -0.2 3.0 -3.7 -0.2 3.2
Attr29 1.2 1.0 0.0 1.2 2.5
Attr30 0.3 2.8 -3.0 0.3 3.7
Attr31 -0.2 2.9 -3.8 -0.1 3.4
Attr32 1.1 3.0 -2.5 0.9 4.7
Attr33 -1.3 2.7 -4.6 -1.0 1.8
Attr34 1.4 1.8 -0.8 1.4 3.6
Attr35 -1.2 2.0 -3.9 -1.2 1.3
Attr36 -1.8 1.8 -4.3 -1.6 0.4
Attr37 -1.0 2.6 -4.2 -0.8 2.0
Attr38 -0.2 2.9 -3.6 -0.2 3.3
Attr39 -0.1 2.8 -3.5 0.0 3.3
Attr40 0.3 2.6 -2.9 0.3 3.5
Attr41 -0.5 0.7 -1.3 -0.6 0.5
Attr42 0.1 2.8 -3.2 0.1 3.4
Attr43 0.5 2.8 -2.9 0.5 3.8
Attr44 -0.3 2.6 -3.6 -0.2 2.8
Attr45 0.1 3.0 -3.5 0.1 3.6
Attr46 -0.1 2.8 -3.5 0.0 3.3
Attr47 0.2 2.8 -3.2 0.2 3.5
Attr48 0.7 2.1 -1.9 0.6 3.4
Attr49 0.1 2.8 -3.2 0.1 3.5
Attr50 -0.4 2.2 -3.2 -0.3 2.4
Attr51 2.5 1.4 0.8 2.5 4.3
Attr52 1.0 2.8 -2.3 0.8 4.4
Attr53 -0.8 2.3 -3.8 -0.7 1.9
Attr54 -0.2 2.8 -3.5 -0.2 3.1
Attr55 -1.8 1.1 -3.3 -1.8 -0.4
Attr56 0.5 2.7 -2.8 0.4 3.9
Attr57 -0.4 1.9 -2.9 -0.3 2.0
Attr58 0.3 2.8 -3.1 0.3 3.8
Attr59 -0.8 2.9 -4.4 -0.6 2.6
Attr60 -0.1 2.9 -3.6 -0.1 3.2
Attr61 -0.9 2.4 -4.0 -0.7 2.0
Attr62 1.1 3.0 -2.4 0.9 4.7
Attr63 -1.2 2.7 -4.5 -1.0 2.1
Attr64 -0.4 2.9 -3.8 -0.4 3.0
Fit Diagnostics:
mean sd 10% 50% 90%
mean_PPD 0.0 0.0 0.0 0.0 0.0
The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
MCMC diagnostics
mcse Rhat n_eff
(Intercept) 0.1 1.0 2178
Attr1 0.0 1.0 5635
Attr2 0.0 1.0 8509
Attr3 0.0 1.0 2805
Attr4 0.0 1.0 6429
Attr5 0.0 1.0 7129
Attr6 0.0 1.0 4202
Attr7 0.0 1.0 6146
Attr8 0.0 1.0 4713
Attr9 0.0 1.0 3832
Attr10 0.0 1.0 7036
Attr11 0.0 1.0 3985
Attr12 0.0 1.0 4648
Attr13 0.0 1.0 7755
Attr14 0.0 1.0 6985
Attr15 0.0 1.0 4448
Attr16 0.0 1.0 3133
Attr17 0.0 1.0 5694
Attr18 0.0 1.0 5717
Attr19 0.0 1.0 6102
Attr20 0.0 1.0 5243
Attr21 0.0 1.0 5839
Attr22 0.0 1.0 5311
Attr23 0.0 1.0 5737
Attr24 0.0 1.0 3037
Attr25 0.0 1.0 4864
Attr26 0.0 1.0 4338
Attr27 0.0 1.0 6827
Attr28 0.0 1.0 5539
Attr29 0.0 1.0 3611
Attr30 0.0 1.0 6520
Attr31 0.0 1.0 5564
Attr32 0.0 1.0 5555
Attr33 0.0 1.0 5013
Attr34 0.0 1.0 4016
Attr35 0.0 1.0 4892
Attr36 0.0 1.0 3357
Attr37 0.0 1.0 4758
Attr38 0.0 1.0 6104
Attr39 0.0 1.0 5848
Attr40 0.0 1.0 5272
Attr41 0.0 1.0 2429
Attr42 0.0 1.0 5358
Attr43 0.0 1.0 5218
Attr44 0.0 1.0 4659
Attr45 0.0 1.0 5289
Attr46 0.0 1.0 5811
Attr47 0.0 1.0 6313
Attr48 0.0 1.0 4475
Attr49 0.0 1.0 5347
Attr50 0.0 1.0 4523
Attr51 0.0 1.0 2461
Attr52 0.0 1.0 4553
Attr53 0.0 1.0 4249
Attr54 0.0 1.0 5930
Attr55 0.0 1.0 3499
Attr56 0.0 1.0 5630
Attr57 0.0 1.0 3997
Attr58 0.0 1.0 5521
Attr59 0.0 1.0 6011
Attr60 0.0 1.0 5062
Attr61 0.0 1.0 5236
Attr62 0.0 1.0 7156
Attr63 0.0 1.0 4899
Attr64 0.0 1.0 5580
mean_PPD 0.0 1.0 4274
log-posterior 0.2 1.0 1499
For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
prior_summary(post_full_coreset)
Priors for model 'post_full_coreset'
------
Intercept (after predictors centered)
~ student_t(df = 7, location = 0, scale = 2.5)
Coefficients
~ student_t(df = [7,7,7,...], location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
------
See help('prior_summary.stanreg') for more details
pp_check(post_full_coreset, "dens_overlay")

pp_check(post_full_coreset, "stat")

Test now
bankruptcy_test <- read_csv('../data/bankruptcy_test_am.csv')
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
.default = col_double()
)
See spec(...) for full column specifications.
bankruptcy_test <- select(bankruptcy_test, -X1)
bankruptcy_test
table(bankruptcy_test$class)
0 1
2105 47
bankruptcy_test_x <- select(bankruptcy_test, -class)
dim(bankruptcy_test_x)
[1] 2152 64
test_pred <- posterior_predict(post_full_coreset, newdata = bankruptcy_test_x)
dim(test_pred)
[1] 4000 2152
hist(test_pred)

pred <- colMeans(test_pred)
pr <- as.integer(pred >= 0.5)
table(pr)
pr
0 1
2139 13
true_pr <- bankruptcy_test$class
table(true_pr)
true_pr
0 1
2105 47
table(true_pr, pr)
pr
true_pr 0 1
0 2093 12
1 46 1
bayes_R2 <- function(fit) {
mupred <- rstanarm::posterior_linpred(fit, transform = TRUE)
var_mupred <- apply(mupred, 1, var)
if (family(fit)$family == "binomial" && NCOL(y) == 1) {
sigma2 <- apply(mupred*(1-mupred), 1, mean)
} else {
sigma2 <- as.matrix(fit, pars = c("sigma"))^2
}
var_mupred / (var_mupred + sigma2)
}
y <- coreset_df$class
round(median(bayesR2<-bayes_R2(post_full_coreset)), 2)
Instead of posterior_linpred(..., transform=TRUE) please call posterior_epred(), which provides equivalent functionality.
[1] 0.58
pxl<-xlim(0,1)
mcmc_hist(data.frame(bayesR2), binwidth=0.02) + pxl +
scale_y_continuous(breaks=NULL) +
xlab('Bayesian R2') +
geom_vline(xintercept=median(bayesR2))
Scale for 'y' is already present. Adding another scale for 'y', which will replace the
existing scale.

library(MLmetrics)
Attaching package: ‘MLmetrics’
The following objects are masked from ‘package:caret’:
MAE, RMSE
The following object is masked from ‘package:base’:
Recall
ConfusionMatrix(pr, true_pr)
y_pred
y_true 0 1
0 2093 12
1 46 1
Precision(true_pr, pr)
[1] 0.9784946
Recall(true_pr, pr)
[1] 0.9942993
Plotting and analysing
plot(post_full_coreset, "areas")

coef(post_full_coreset)
(Intercept) Attr1 Attr2 Attr3 Attr4 Attr5
-1.159387e+01 -2.087937e-01 4.375479e-02 1.381348e+00 -3.403171e-01 1.576582e-02
Attr6 Attr7 Attr8 Attr9 Attr10 Attr11
1.024552e+00 -7.899878e-02 -5.134149e-01 4.549746e-01 1.586445e-01 1.495039e-01
Attr12 Attr13 Attr14 Attr15 Attr16 Attr17
3.083649e-01 -4.576755e-02 -6.845048e-02 -1.092996e+00 -1.080192e+00 -4.828847e-01
Attr18 Attr19 Attr20 Attr21 Attr22 Attr23
-3.177100e-02 -8.394860e-02 8.560833e-01 -8.891445e-04 -5.672796e-01 -5.624404e-03
Attr24 Attr25 Attr26 Attr27 Attr28 Attr29
1.476013e+00 5.079452e-01 -7.561422e-01 1.222525e-02 -1.933720e-01 1.187093e+00
Attr30 Attr31 Attr32 Attr33 Attr34 Attr35
3.305826e-01 -1.048468e-01 8.724477e-01 -1.037943e+00 1.374382e+00 -1.158643e+00
Attr36 Attr37 Attr38 Attr39 Attr40 Attr41
-1.643886e+00 -8.025335e-01 -1.700301e-01 -1.689906e-02 2.818221e-01 -5.967309e-01
Attr42 Attr43 Attr44 Attr45 Attr46 Attr47
5.361213e-02 4.562551e-01 -2.498703e-01 7.502764e-02 -1.703349e-02 1.811780e-01
Attr48 Attr49 Attr50 Attr51 Attr52 Attr53
5.527201e-01 9.417702e-02 -3.081719e-01 2.463733e+00 8.397935e-01 -6.800738e-01
Attr54 Attr55 Attr56 Attr57 Attr58 Attr59
-2.268086e-01 -1.752727e+00 3.935452e-01 -2.584169e-01 3.493494e-01 -6.483292e-01
Attr60 Attr61 Attr62 Attr63 Attr64
-9.284951e-02 -7.445959e-01 9.152790e-01 -1.018643e+00 -3.613087e-01
posterior_interval(post_full_coreset)
5% 95%
(Intercept) -16.5819765 -8.25334727
Attr1 -4.6944816 4.25091821
Attr2 -4.3239556 4.55609703
Attr3 -0.7307162 3.72894171
Attr4 -4.8744037 3.98039460
Attr5 -4.8331070 4.83294700
Attr6 -3.0882285 5.82097875
Attr7 -4.6496339 4.63894533
Attr8 -4.9916277 3.56914225
Attr9 -2.7023824 3.63238631
Attr10 -4.3906836 4.63475729
Attr11 -3.0090301 3.69660666
Attr12 -3.8485979 5.06645190
Attr13 -5.1378647 4.79021074
Attr14 -4.8735712 4.65483123
Attr15 -3.5893669 1.73614905
Attr16 -5.3022113 2.60623610
Attr17 -5.0631693 3.32997329
Attr18 -5.1837027 4.90156905
Attr19 -4.5233812 4.29279433
Attr20 -3.3598899 5.64601174
Attr21 -4.4998512 4.63264020
Attr22 -4.6176297 3.16376657
Attr23 -4.3132488 4.68909028
Attr24 -1.3206109 5.88015354
Attr25 -3.3661084 4.72937436
Attr26 -4.6674899 2.89053127
Attr27 -4.7111418 4.78099682
Attr28 -5.0435758 4.53297016
Attr29 -0.3455968 2.88143317
Attr30 -3.8822444 4.96812770
Attr31 -4.8359562 4.42422322
Attr32 -3.5555204 6.25742879
Attr33 -5.8224550 2.72101330
Attr34 -1.5265292 4.21545005
Attr35 -4.6950274 2.00022730
Attr36 -4.9929539 0.93185739
Attr37 -5.4662661 2.89055281
Attr38 -4.8896395 4.40300141
Attr39 -4.7356877 4.57536114
Attr40 -3.9563963 4.52584355
Attr41 -1.4350350 0.85267110
Attr42 -4.3623511 4.45417932
Attr43 -3.9328080 4.96118556
Attr44 -4.5767794 3.81244108
Attr45 -4.6079412 4.88442936
Attr46 -4.7265358 4.36572434
Attr47 -4.1053944 4.63497728
Attr48 -2.6321474 4.27977621
Attr49 -4.2823683 4.60898125
Attr50 -4.1533470 3.16280355
Attr51 0.3160842 4.88517669
Attr52 -3.2753745 5.51773210
Attr53 -4.8797454 2.72847363
Attr54 -4.6567941 4.16568441
Attr55 -3.8132586 -0.08295383
Attr56 -3.8621260 5.04418630
Attr57 -3.7569650 2.61040989
Attr58 -4.1395681 4.96649195
Attr59 -5.7716735 3.72474766
Attr60 -4.8356731 4.36907349
Attr61 -5.0413434 2.84358910
Attr62 -3.5429171 6.14524241
Attr63 -5.7885882 2.99430012
Attr64 -4.9843490 4.20072189
Bayesian Variable Selection
vs_coreset <- varsel(post_full_coreset, method='forward')
Instead of posterior_linpred(..., transform=TRUE) please call posterior_epred(), which provides equivalent functionality.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
vs_coreset$vind
Attr55 Attr43 Attr10 Attr29 Attr34 Attr41 Attr62 Attr15 Attr64 Attr25 Attr35 Attr51 Attr2
55 43 10 29 34 41 62 15 64 25 35 51 2
Attr3 Attr30 Attr40 Attr13 Attr60 Attr24 Attr61
3 30 40 13 60 24 61
loo
lpost1 <- readRDS("../model/post1.rds")
loo(lpost1, post_full_coreset)
all scheduled cores encountered errors in user codeError in FUN(X[[i]], ...) : subscript out of bounds
Apply neural network to Coreset
Pre-process - factor and scale data
training_data <- read_csv('../data/coreset_train.csv')
Parsed with column specification:
cols(
.default = col_double()
)
See spec(...) for full column specifications.
head(training_data)
test_data <- select(read_csv('../data/bankruptcy_test_am.csv'), -X1)
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
.default = col_double()
)
See spec(...) for full column specifications.
head(test_data)
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
training_data_pp <- as.data.frame(lapply(training_data, normalize))
test_data_pp <- as.data.frame(lapply(test_data, normalize))
Neural Network Classifier
library('neuralnet')
nn <- neuralnet(class ~ ., data=training_data_pp, hidden=c(32,16,8,4), linear.output=FALSE, threshold=0.01)
plot(nn)

nn.results <- compute(nn, test_data_pp)
results <- data.frame(actual = test_data_pp$class, prediction = nn.results$net.result)
results
roundedresults<-sapply(results,round,digits=0)
roundedresultsdf=data.frame(roundedresults)
attach(roundedresultsdf)
The following objects are masked from roundedresultsdf (pos = 3):
actual, prediction
The following objects are masked from roundedresultsdf (pos = 4):
actual, prediction
The following objects are masked from roundedresultsdf (pos = 13):
actual, prediction
table(actual,prediction)
prediction
actual 0
0 2105
1 47
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmNvcmVzZXRfZGYgPC0gcmVhZC5jc3YoJy4uL2RhdGEvY29yZXNldF90cmFpbi5jc3YnKQpjb3Jlc2V0X2RmCmBgYAoKYGBge3J9CnRhYmxlKGNvcmVzZXRfZGYkY2xhc3MpCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjYXJldCkKbGlicmFyeShHR2FsbHkpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShjb3JycGxvdCkKbGlicmFyeShiYXllc3Bsb3QpCnRoZW1lX3NldChiYXllc3Bsb3Q6OnRoZW1lX2RlZmF1bHQoYmFzZV9mYW1pbHkgPSAic2FucyIpKQpsaWJyYXJ5KHJzdGFuYXJtKQpvcHRpb25zKG1jLmNvcmVzID0gcGFyYWxsZWw6OmRldGVjdENvcmVzKCkpCmxpYnJhcnkobG9vKQpsaWJyYXJ5KHByb2pwcmVkKQpTRUVEPTQyCmBgYAoKCmBgYHtyfQp0X3ByaW9yIDwtIHN0dWRlbnRfdChkZiA9IDcsIGxvY2F0aW9uID0gMCwgc2NhbGUgPSAyLjUpCmBgYAoKYGBge3J9CnBvc3RfZnVsbF9jb3Jlc2V0IDwtIHN0YW5fZ2xtKGNsYXNzIH4gLiAsIGRhdGEgPSBjb3Jlc2V0X2RmLAogICAgICAgICAgICAgICAgIGZhbWlseSA9IGJpbm9taWFsKGxpbmsgPSAibG9naXQiKSwgCiAgICAgICAgICAgICAgICAgcHJpb3IgPSB0X3ByaW9yLCBwcmlvcl9pbnRlcmNlcHQgPSB0X3ByaW9yLAogICAgICAgICAgICAgICAgIGNvcmVzPTQsIHNlZWQgPSA0MikKYGBgCgpgYGB7cn0Kc3VtbWFyeShwb3N0X2Z1bGxfY29yZXNldCkKYGBgCgpgYGB7cn0KcHJpb3Jfc3VtbWFyeShwb3N0X2Z1bGxfY29yZXNldCkKYGBgCgoKYGBge3J9CnBwX2NoZWNrKHBvc3RfZnVsbF9jb3Jlc2V0LCAiZGVuc19vdmVybGF5IikKYGBgCgpgYGB7cn0KcHBfY2hlY2socG9zdF9mdWxsX2NvcmVzZXQsICJzdGF0IikKYGBgCgojIyBUZXN0IG5vdwoKYGBge3J9CmJhbmtydXB0Y3lfdGVzdCA8LSByZWFkX2NzdignLi4vZGF0YS9iYW5rcnVwdGN5X3Rlc3RfYW0uY3N2JykKYmFua3J1cHRjeV90ZXN0IDwtIHNlbGVjdChiYW5rcnVwdGN5X3Rlc3QsIC1YMSkKYmFua3J1cHRjeV90ZXN0CmBgYApgYGB7cn0KdGFibGUoYmFua3J1cHRjeV90ZXN0JGNsYXNzKQpgYGAKCgpgYGB7cn0KYmFua3J1cHRjeV90ZXN0X3ggPC0gc2VsZWN0KGJhbmtydXB0Y3lfdGVzdCwgLWNsYXNzKQpkaW0oYmFua3J1cHRjeV90ZXN0X3gpCmBgYAoKCmBgYHtyfQp0ZXN0X3ByZWQgPC0gcG9zdGVyaW9yX3ByZWRpY3QocG9zdF9mdWxsX2NvcmVzZXQsIG5ld2RhdGEgPSBiYW5rcnVwdGN5X3Rlc3RfeCkKYGBgCgpgYGB7cn0KZGltKHRlc3RfcHJlZCkKaGlzdCh0ZXN0X3ByZWQpCmBgYAoKYGBge3J9CnByZWQgPC0gY29sTWVhbnModGVzdF9wcmVkKQpwciA8LSBhcy5pbnRlZ2VyKHByZWQgPj0gMC41KQpgYGAKCmBgYHtyfQp0YWJsZShwcikKYGBgCgpgYGB7cn0KdHJ1ZV9wciA8LSBiYW5rcnVwdGN5X3Rlc3QkY2xhc3MKdGFibGUodHJ1ZV9wcikKYGBgCgpgYGB7cn0KdGFibGUodHJ1ZV9wciwgcHIpCmBgYAoKYGBge3J9CmJheWVzX1IyIDwtIGZ1bmN0aW9uKGZpdCkgewogIG11cHJlZCA8LSByc3RhbmFybTo6cG9zdGVyaW9yX2xpbnByZWQoZml0LCB0cmFuc2Zvcm0gPSBUUlVFKQogIHZhcl9tdXByZWQgPC0gYXBwbHkobXVwcmVkLCAxLCB2YXIpCiAgaWYgKGZhbWlseShmaXQpJGZhbWlseSA9PSAiYmlub21pYWwiICYmIE5DT0woeSkgPT0gMSkgewogICAgICBzaWdtYTIgPC0gYXBwbHkobXVwcmVkKigxLW11cHJlZCksIDEsIG1lYW4pCiAgfSBlbHNlIHsKICAgICAgc2lnbWEyIDwtIGFzLm1hdHJpeChmaXQsIHBhcnMgPSBjKCJzaWdtYSIpKV4yCiAgfQogIHZhcl9tdXByZWQgLyAodmFyX211cHJlZCArIHNpZ21hMikKfQpgYGAKCgpgYGB7cn0KeSA8LSBjb3Jlc2V0X2RmJGNsYXNzCnJvdW5kKG1lZGlhbihiYXllc1IyPC1iYXllc19SMihwb3N0X2Z1bGxfY29yZXNldCkpLCAyKQpgYGAKCmBgYHtyfQpweGw8LXhsaW0oMCwxKQptY21jX2hpc3QoZGF0YS5mcmFtZShiYXllc1IyKSwgYmlud2lkdGg9MC4wMikgKyBweGwgKwogICAgc2NhbGVfeV9jb250aW51b3VzKGJyZWFrcz1OVUxMKSArCiAgICB4bGFiKCdCYXllc2lhbiBSMicpICsKICAgIGdlb21fdmxpbmUoeGludGVyY2VwdD1tZWRpYW4oYmF5ZXNSMikpCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KE1MbWV0cmljcykKYGBgCgpgYGB7cn0KQ29uZnVzaW9uTWF0cml4KHByLCB0cnVlX3ByKQpgYGAKCmBgYHtyfQpQcmVjaXNpb24odHJ1ZV9wciwgcHIpCmBgYAoKYGBge3J9ClJlY2FsbCh0cnVlX3ByLCBwcikKYGBgCgojIyBQbG90dGluZyBhbmQgYW5hbHlzaW5nCmBgYHtyfQpwbG90KHBvc3RfZnVsbF9jb3Jlc2V0LCAiYXJlYXMiKQpgYGAKCmBgYHtyfQpjb2VmKHBvc3RfZnVsbF9jb3Jlc2V0KQpgYGAKCmBgYHtyfQpwb3N0ZXJpb3JfaW50ZXJ2YWwocG9zdF9mdWxsX2NvcmVzZXQpCmBgYAoKIyMgQmF5ZXNpYW4gVmFyaWFibGUgU2VsZWN0aW9uCmBgYHtyfQp2c19jb3Jlc2V0IDwtIHZhcnNlbChwb3N0X2Z1bGxfY29yZXNldCwgbWV0aG9kPSdmb3J3YXJkJykKYGBgCgpgYGB7cn0KdnNfY29yZXNldCR2aW5kCmBgYAoKCiMjIGxvbwoKYGBge3J9Cmxwb3N0MSA8LSByZWFkUkRTKCIuLi9tb2RlbC9wb3N0MS5yZHMiKQpgYGAKCmBgYHtyfQpsb28obHBvc3QxLCBwb3N0X2Z1bGxfY29yZXNldCkKYGBgCiMjIEFwcGx5IG5ldXJhbCBuZXR3b3JrIHRvIENvcmVzZXQKCiMjIyBQcmUtcHJvY2VzcyAtIGZhY3RvciBhbmQgc2NhbGUgZGF0YQoKCgpgYGB7cn0KdHJhaW5pbmdfZGF0YSA8LSByZWFkX2NzdignLi4vZGF0YS9jb3Jlc2V0X3RyYWluLmNzdicpCmhlYWQodHJhaW5pbmdfZGF0YSkKYGBgCmBgYHtyfQp0ZXN0X2RhdGEgPC0gc2VsZWN0KHJlYWRfY3N2KCcuLi9kYXRhL2JhbmtydXB0Y3lfdGVzdF9hbS5jc3YnKSwgLVgxKQpoZWFkKHRlc3RfZGF0YSkKYGBgCgoKCmBgYHtyfQpub3JtYWxpemUgPC0gZnVuY3Rpb24oeCkgewogIHJldHVybiAoKHggLSBtaW4oeCkpIC8gKG1heCh4KSAtIG1pbih4KSkpCn0KYGBgCgoKYGBge3J9CnRyYWluaW5nX2RhdGFfcHAgPC0gYXMuZGF0YS5mcmFtZShsYXBwbHkodHJhaW5pbmdfZGF0YSwgbm9ybWFsaXplKSkKdGVzdF9kYXRhX3BwIDwtIGFzLmRhdGEuZnJhbWUobGFwcGx5KHRlc3RfZGF0YSwgbm9ybWFsaXplKSkKCmBgYAoKCgoKIyMgTmV1cmFsIE5ldHdvcmsgQ2xhc3NpZmllcgpgYGB7cn0KbGlicmFyeSgnbmV1cmFsbmV0JykKYGBgCgoKCmBgYHtyfQpubiA8LSBuZXVyYWxuZXQoY2xhc3MgfiAuLCBkYXRhPXRyYWluaW5nX2RhdGFfcHAsIGhpZGRlbj1jKDMyLDE2LDgsNCksIGxpbmVhci5vdXRwdXQ9RkFMU0UsIHRocmVzaG9sZD0wLjAxKQpwbG90KG5uKQpgYGAKCgpgYGB7cn0Kbm4ucmVzdWx0cyA8LSBjb21wdXRlKG5uLCB0ZXN0X2RhdGFfcHApCnJlc3VsdHMgPC0gZGF0YS5mcmFtZShhY3R1YWwgPSB0ZXN0X2RhdGFfcHAkY2xhc3MsIHByZWRpY3Rpb24gPSBubi5yZXN1bHRzJG5ldC5yZXN1bHQpCnJlc3VsdHMKYGBgCgpgYGB7cn0Kcm91bmRlZHJlc3VsdHM8LXNhcHBseShyZXN1bHRzLHJvdW5kLGRpZ2l0cz0wKQpyb3VuZGVkcmVzdWx0c2RmPWRhdGEuZnJhbWUocm91bmRlZHJlc3VsdHMpCmF0dGFjaChyb3VuZGVkcmVzdWx0c2RmKQp0YWJsZShhY3R1YWwscHJlZGljdGlvbikKYGBgCgoKYGBge3J9CmBgYAoKCmBgYHtyfQpgYGAKCgpgYGB7cn0KYGBgCgo=